Resnet V0.¶
Import¶
In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import timm
from sklearn.model_selection import KFold
# GPU 사용 가능 여부 확인
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'사용 디바이스: {device}')
if torch.cuda.is_available():
print(f'GPU: {torch.cuda.get_device_name(0)}')
사용 디바이스: cuda GPU: NVIDIA GeForce RTX 4060 Laptop GPU
Data Load¶
In [2]:
# 하이퍼파라미터
img_size = 256
batch_size = 64
num_classes = 10
num_epochs = 50
# learning_rate = 0.0003 -> train section
num_workers = 0
version = "0.0"
# 데이터 경로
base_dir = r'c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj'
driver_csv_path = os.path.join(base_dir, 'data', 'driver_imgs_list.csv')
train_dir = os.path.join(base_dir, 'data', 'imgs', 'train')
test_dir = os.path.join(base_dir, 'data', 'imgs', 'test')
best_model_path = f'models/inception_v{version}.pth'
print(f"Train directory: {train_dir}")
print(f"Test directory: {test_dir}")
print(f"이미지 크기: {img_size}x{img_size}")
print(f"배치 크기: {batch_size}")
Train directory: c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\data\imgs\train Test directory: c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\data\imgs\test 이미지 크기: 256x256 배치 크기: 64
In [3]:
driver_df = pd.read_csv(driver_csv_path)
print(f"고유 운전자 수: {driver_df['subject'].nunique()}명")
print(f"운전자 목록: {sorted(driver_df['subject'].unique())}")
driver_counts = driver_df['subject'].value_counts().sort_index()
plt.figure(figsize=(12, 6))
plt.bar(driver_counts.index.astype(str), driver_counts.values, color='C0', alpha=0.9)
plt.xlabel('Driver')
plt.ylabel('Image Count')
plt.title('Images per Driver')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()
고유 운전자 수: 26명 운전자 목록: ['p002', 'p012', 'p014', 'p015', 'p016', 'p021', 'p022', 'p024', 'p026', 'p035', 'p039', 'p041', 'p042', 'p045', 'p047', 'p049', 'p050', 'p051', 'p052', 'p056', 'p061', 'p064', 'p066', 'p072', 'p075', 'p081']
5-Fold Cross Validation¶
fold별 운전자 목록을 미리 나누긴 하되 초기 실험 단계에서는 한 폴드(fold 2, 학습 데이터가 가장 많아서..)만 사용하고 이후 마무리 단계에서 전체 폴드 다 돌려서 일반화 성능 끌어올리는 방향으로 진행.
split train data into 5 folds¶
In [4]:
all_drivers = sorted(driver_df['subject'].unique())
n_folds = 5
kfold = KFold(n_splits=n_folds, shuffle=True, random_state=42)
fold_splits = []
for fold_idx, (train_indices, val_indices) in enumerate(kfold.split(all_drivers)):
train_drivers = [all_drivers[i] for i in train_indices]
val_drivers = [all_drivers[i] for i in val_indices]
fold_splits.append({
'fold': fold_idx+1,
'train_drivers': train_drivers,
'val_drivers': val_drivers
})
print("Fold", fold_idx+1)
print("train:", train_drivers, "val:", val_drivers)
train_imgs = driver_df[driver_df['subject'].isin(train_drivers)]
val_imgs = driver_df[driver_df['subject'].isin(val_drivers)]
print(f"학습 이미지: {len(train_imgs)}개")
print(f"검증 이미지: {len(val_imgs)}개")
Fold 1 train: ['p012', 'p014', 'p015', 'p016', 'p021', 'p022', 'p024', 'p039', 'p042', 'p045', 'p047', 'p049', 'p051', 'p052', 'p056', 'p061', 'p064', 'p066', 'p072', 'p081'] val: ['p002', 'p026', 'p035', 'p041', 'p050', 'p075'] 학습 이미지: 17446개 검증 이미지: 4978개 Fold 2 train: ['p002', 'p015', 'p016', 'p022', 'p024', 'p026', 'p035', 'p039', 'p041', 'p042', 'p047', 'p049', 'p050', 'p051', 'p052', 'p056', 'p061', 'p064', 'p066', 'p075', 'p081'] val: ['p012', 'p014', 'p021', 'p045', 'p072'] 학습 이미지: 18418개 검증 이미지: 4006개 Fold 3 train: ['p002', 'p012', 'p014', 'p021', 'p022', 'p024', 'p026', 'p035', 'p039', 'p041', 'p045', 'p047', 'p050', 'p051', 'p052', 'p056', 'p061', 'p066', 'p072', 'p075', 'p081'] val: ['p015', 'p016', 'p042', 'p049', 'p064'] 학습 이미지: 18049개 검증 이미지: 4375개 Fold 4 train: ['p002', 'p012', 'p014', 'p015', 'p016', 'p021', 'p022', 'p024', 'p026', 'p035', 'p039', 'p041', 'p042', 'p045', 'p047', 'p049', 'p050', 'p056', 'p064', 'p072', 'p075'] val: ['p051', 'p052', 'p061', 'p066', 'p081'] 학습 이미지: 18098개 검증 이미지: 4326개 Fold 5 train: ['p002', 'p012', 'p014', 'p015', 'p016', 'p021', 'p026', 'p035', 'p041', 'p042', 'p045', 'p049', 'p050', 'p051', 'p052', 'p061', 'p064', 'p066', 'p072', 'p075', 'p081'] val: ['p022', 'p024', 'p039', 'p047', 'p056'] 학습 이미지: 17685개 검증 이미지: 4739개
define DraiverDataset¶
In [5]:
class DriverDataset(Dataset):
"""운전자 행동 데이터셋"""
def __init__(self, data_dir, driver_df, driver_list, transform=None, is_test=False):
self.data_dir = data_dir
self.transform = transform
self.is_test = is_test
self.images = []
self.labels = []
if is_test:
test_images_dir = data_dir
for img_name in os.listdir(test_images_dir):
self.images.append(os.path.join(test_images_dir, img_name))
else: #is_train
driver_subset = driver_df[driver_df['subject'].isin(driver_list)]
for _, row in driver_subset.iterrows():
class_name = row['classname']
img_name = row['img']
img_path = os.path.join(data_dir, class_name, img_name)
self.images.append(img_path)
class_idx = int(class_name[1:])
self.labels.append(class_idx)
print(f"{'테스트' if is_test else '운전자' + str(len(driver_list))+'명'}, 데이터 {len(self.images)}개 이미지")
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
img_path = self.images[idx]
image = Image.open(img_path).convert('RGB')
if self.transform:
image = self.transform(image)
if self.is_test:
return image, os.path.basename(img_path)
else:
label = self.labels[idx]
return image, label
define online team transform¶
In [6]:
team_transform_train = transforms.Compose([
transforms.Resize((img_size, img_size)),
transforms.RandomRotation(degrees=10),
transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
transforms.ToTensor(),
transforms.RandomErasing(p=0.25),
transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
])
team_transform_eval = transforms.Compose([
transforms.Resize((img_size, img_size)),
transforms.ToTensor(),
transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
])
train¶
In [7]:
def train_epoch(model, train_loader, criterion, optimizer, device):
"""
한 에폭(Epoch) 동안 모델을 학습시키고 손실과 정확도를 반환합니다.
"""
model.train() # 모델을 학습 모드로 설정
running_loss = 0.0
correct_predictions = 0
total_samples = 0
# 훈련 데이터셋의 모든 배치를 반복합니다.
for inputs, labels in tqdm(train_loader, desc="Training"):
inputs = inputs.to(device)
labels = labels.to(device)
# 그래디언트 초기화
optimizer.zero_grad()
# 순전파 (Forward pass)
outputs = model(inputs)
loss = criterion(outputs, labels) # Log Loss (CrossEntropyLoss) 계산
# 역전파 및 최적화 (Backward pass and optimization)
loss.backward()
optimizer.step()
# 통계 업데이트
running_loss += loss.item() * inputs.size(0)
_, preds = torch.max(outputs, 1)
correct_predictions += torch.sum(preds == labels.data)
total_samples += inputs.size(0)
epoch_loss = running_loss / total_samples
epoch_acc = (correct_predictions.double() / total_samples) * 100
return epoch_loss, epoch_acc.item()
In [8]:
def validate(model, val_loader, criterion, device):
"""
검증 데이터셋을 이용해 모델의 성능(손실과 정확도)을 평가하고 반환합니다.
"""
model.eval() # 모델을 평가 모드로 설정
running_loss = 0.0
correct_predictions = 0
total_samples = 0
# 그래디언트 계산을 비활성화합니다.
with torch.no_grad():
for inputs, labels in tqdm(val_loader, desc="Validating"):
inputs = inputs.to(device)
labels = labels.to(device)
# 순전파
outputs = model(inputs)
loss = criterion(outputs, labels) # Log Loss (CrossEntropyLoss) 계산
# 통계 업데이트
running_loss += loss.item() * inputs.size(0)
_, preds = torch.max(outputs, 1)
correct_predictions += torch.sum(preds == labels.data)
total_samples += inputs.size(0)
epoch_loss = running_loss / total_samples
epoch_acc = (correct_predictions.double() / total_samples) * 100
# epoch_loss가 Log Loss에 해당하며, 이 값을 기준으로 최고 성능 모델을 저장합니다.
return epoch_loss, epoch_acc.item()
In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import SequentialLR, LinearLR, CosineAnnealingLR
from torch.utils.data import DataLoader
import timm
import numpy as np
from sklearn.metrics import f1_score, log_loss
from tqdm import tqdm # ⭐ 추가
def train_fold(fold_idx, train_drivers, val_drivers):
"""
EXP-1: ResNet-50-D baseline (RGB only)
- Optimizer: AdamW
- Scheduler: Warmup(5ep) -> CosineAnnealing
- Loss: CrossEntropy(label_smoothing=0.1)
- Early Stop / Best Save: macro-F1
- 추가 로깅: multiclass logloss
"""
print(f"==== Fold {fold_idx}/{n_folds} ====")
# ========== 데이터셋 & 로더 ==========
train_dataset = DriverDataset(
train_dir, driver_df, train_drivers,
transform=team_transform_train, is_test=False
)
val_dataset = DriverDataset(
train_dir, driver_df, val_drivers,
transform=team_transform_eval, is_test=False
)
train_loader = DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=num_workers,
pin_memory=True
)
val_loader = DataLoader(
val_dataset,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
pin_memory=True
)
print(f"학습 배치 수: {len(train_loader)}")
print(f"검증 배치 수: {len(val_loader)}")
# ========== 모델 ==========
model = timm.create_model(
'resnet26d',
pretrained=True,
num_classes=num_classes
).to(device)
for p in model.parameters():
p.requires_grad = True
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print("\n" + "="*70)
print("📊 모델 파라미터 통계 (EXP-1: ResNet-26-D)")
print("="*70)
print(f" 전체 파라미터: {total_params:>15,}")
print(f" 학습 가능 파라미터: {trainable_params:>15,} ({100*trainable_params/total_params:>6.2f}%)")
print("="*70 + "\n")
# ========== 손실 & 옵티마이저 & 스케줄러 ==========
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = torch.optim.AdamW(
model.parameters(),
lr=3e-4,
weight_decay=5e-2
)
warmup_epochs = 5
cosine_epochs = max(1, num_epochs - warmup_epochs)
scheduler = SequentialLR(
optimizer,
schedulers=[
LinearLR(optimizer, start_factor=0.01, end_factor=1.0, total_iters=warmup_epochs),
CosineAnnealingLR(optimizer, T_max=cosine_epochs)
],
milestones=[warmup_epochs]
)
# ========== 히스토리 ==========
history = {
'train_loss': [],
'train_acc': [],
'val_loss': [],
'val_acc': [],
'val_macro_f1': [],
'val_logloss': [],
'learning_rates': []
}
# ========== Early Stopping ==========
early_stop_patience = 15
patience_counter = 0
best_metric = -float('inf')
best_model_path = f'models/best_exp1_resnet50d_fold{fold_idx}.pth'
print(f"\n⏱️ Early Stopping Patience (macro-F1 기준): {early_stop_patience} ep\n")
print("="*70)
print("🚀 학습 시작 (EXP-1)")
print("="*70)
def eval_on_loader(model, loader, criterion):
"""검증 루프: loss/acc + macro-F1 + multiclass logloss 계산"""
model.eval()
total, correct, running_loss = 0, 0, 0.0
all_probs, all_labels = [], []
# ⭐ tqdm 추가
with torch.no_grad():
pbar = tqdm(loader, desc='Validating', leave=False)
for images, labels in pbar:
images = images.to(device, non_blocking=True)
labels = labels.to(device, non_blocking=True)
logits = model(images)
loss = criterion(logits, labels)
running_loss += loss.item() * labels.size(0)
preds = logits.argmax(dim=1)
correct += (preds == labels).sum().item()
total += labels.size(0)
probs = logits.softmax(dim=1).detach().cpu().numpy()
all_probs.append(probs)
all_labels.append(labels.detach().cpu().numpy())
# ⭐ 실시간 업데이트
current_acc = 100.0 * correct / total
pbar.set_postfix({
'loss': f'{loss.item():.4f}',
'acc': f'{current_acc:.2f}%'
})
avg_loss = running_loss / max(1, total)
acc = 100.0 * correct / max(1, total)
all_probs = np.concatenate(all_probs, axis=0)
all_labels = np.concatenate(all_labels, axis=0)
macro_f1 = f1_score(all_labels, np.argmax(all_probs, axis=1), average='macro')
mlogloss = log_loss(all_labels, all_probs, labels=list(range(num_classes)))
return avg_loss, acc, macro_f1, mlogloss
# ========== 에폭 루프 ==========
for epoch in range(num_epochs):
print(f'\n{"="*70}')
print(f'Epoch {epoch+1}/{num_epochs}')
print(f'{"="*70}')
current_lr = optimizer.param_groups[0]['lr']
history['learning_rates'].append(current_lr)
# ======= Train (⭐ tqdm 추가) =======
model.train()
train_loss, train_correct, train_total = 0.0, 0, 0
pbar = tqdm(train_loader, desc='Training', leave=False)
for images, labels in pbar:
images = images.to(device, non_blocking=True)
labels = labels.to(device, non_blocking=True)
optimizer.zero_grad(set_to_none=True)
logits = model(images)
loss = criterion(logits, labels)
loss.backward()
optimizer.step()
train_loss += loss.item() * labels.size(0)
preds = logits.argmax(dim=1)
train_correct += (preds == labels).sum().item()
train_total += labels.size(0)
# ⭐ 실시간 업데이트
current_acc = 100.0 * train_correct / train_total
pbar.set_postfix({
'loss': f'{loss.item():.4f}',
'acc': f'{current_acc:.2f}%',
'lr': f'{current_lr:.6f}'
})
epoch_train_loss = train_loss / max(1, train_total)
epoch_train_acc = 100.0 * train_correct / max(1, train_total)
# ======= Validate =======
val_loss, val_acc, val_macro_f1, val_logloss = eval_on_loader(model, val_loader, criterion)
scheduler.step()
history['train_loss'].append(epoch_train_loss)
history['train_acc'].append(epoch_train_acc)
history['val_loss'].append(val_loss)
history['val_acc'].append(val_acc)
history['val_macro_f1'].append(val_macro_f1)
history['val_logloss'].append(val_logloss)
print(f'\n📊 Epoch {epoch+1} 결과:')
print(f' Train Loss: {epoch_train_loss:.4f} | Train Acc: {epoch_train_acc:.2f}%')
print(f' Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%')
print(f' Val Macro-F1: {val_macro_f1:.4f} | Val LogLoss: {val_logloss:.4f}')
print(f' LR: {current_lr:.6f}')
# ======= Best 저장 & Early Stopping =======
if val_macro_f1 > best_metric:
best_metric = val_macro_f1
patience_counter = 0
torch.save({
'fold': fold_idx,
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'history': history,
'val_loss': val_loss,
'val_acc': val_acc,
'val_macro_f1': val_macro_f1,
'val_logloss': val_logloss,
'model_name': 'resnet50d',
'freeze_mode': 'full_finetune',
'trainable_params': trainable_params,
'total_params': total_params,
}, best_model_path)
print(f' ✅ 최고 성능(매크로F1) 모델 저장! (Macro-F1: {val_macro_f1:.4f})')
else:
patience_counter += 1
print(f' ⏳ Early Stopping 카운터: {patience_counter}/{early_stop_patience}')
if patience_counter >= early_stop_patience:
print(f'\n{"="*70}')
print(f'🛑 Early Stopping 발동! (Epoch {epoch+1}) — macro-F1 개선 없음')
print(f' 최고 Macro-F1: {best_metric:.4f}')
print(f' 모델 저장 경로: {best_model_path}')
print(f'{"="*70}')
break
final_epoch = epoch + 1
print("\n" + "="*70)
print(f"✅ Fold {fold_idx} 학습 완료! (EXP-1)")
print("="*70)
print(f" 총 학습 에폭: {final_epoch}/{num_epochs}")
print(f" 최고 Macro-F1: {max(history['val_macro_f1']):.4f}")
print(f" 최저 Val Loss: {min(history['val_loss']):.4f}")
print(f" 최저 LogLoss: {min(history['val_logloss']):.4f}")
print(f" 최고 Val Acc: {max(history['val_acc']):.2f}%")
print(f" 모델 저장: {best_model_path}")
print(f" 학습 파라미터: {trainable_params:,} / {total_params:,} ({100*trainable_params/total_params:.2f}%)")
print("="*70)
return {
'fold': fold_idx,
'history': history,
'best_macro_f1': max(history['val_macro_f1']),
'best_val_loss': min(history['val_loss']),
'best_val_logloss': min(history['val_logloss']),
'best_val_acc': max(history['val_acc']),
'model_path': best_model_path,
'stopped_epoch': final_epoch,
'model_name': 'resnet50d',
'freeze_mode': 'full_finetune',
'trainable_params': trainable_params,
'total_params': total_params
}
In [12]:
# ========== Fold 2 학습 (Inception-C부터 Fine-tuning) ==========
all_fold_results = []
fold_info = fold_splits[1] # Fold 2
fold_idx = fold_info['fold']
train_drivers = fold_info['train_drivers']
val_drivers = fold_info['val_drivers']
print("==== Resnet v0 ====")
# Inception-C부터 학습
fold_result = train_fold(
fold_idx,
train_drivers,
val_drivers,
)
all_fold_results.append(fold_result)
# 메모리 정리
torch.cuda.empty_cache()
# 결과 출력
print(f"\n{'='*70}")
print("📊 최종 결과")
print(f"{'='*70}")
print(f" Fold: {fold_result['fold']}")
print(f" 최저 Val Loss: {fold_result['best_val_loss']:.4f}")
print(f" 해당 Val Acc: {fold_result['best_val_acc']:.2f}%")
print(f" 최고 Val Acc: {fold_result['max_val_acc']:.2f}%")
print(f" 학습 완료 에폭: {fold_result['stopped_epoch']}")
print(f" Freeze 모드: {fold_result['freeze_mode']}")
print(f" 학습 파라미터: {fold_result['trainable_params']:,} / {fold_result['total_params']:,}")
print(f"{'='*70}")
==== Resnet v0 ==== ==== Fold 2/5 ==== 운전자21명, 데이터 18418개 이미지 운전자5명, 데이터 4006개 이미지 학습 배치 수: 288 검증 배치 수: 63 ====================================================================== 📊 모델 파라미터 통계 (EXP-1: ResNet-26-D) ====================================================================== 전체 파라미터: 13,985,898 학습 가능 파라미터: 13,985,898 (100.00%) ====================================================================== ⏱️ Early Stopping Patience (macro-F1 기준): 15 ep ====================================================================== 🚀 학습 시작 (EXP-1) ====================================================================== ====================================================================== Epoch 1/50 ======================================================================
📊 Epoch 1 결과: Train Loss: 2.2178 | Train Acc: 26.28% Val Loss: 2.1787 | Val Acc: 28.53% Val Macro-F1: 0.2455 | Val LogLoss: 2.1622 LR: 0.000003 ✅ 최고 성능(매크로F1) 모델 저장! (Macro-F1: 0.2455) ====================================================================== Epoch 2/50 ======================================================================
📊 Epoch 2 결과: Train Loss: 0.7937 | Train Acc: 91.48% Val Loss: 0.8684 | Val Acc: 86.52% Val Macro-F1: 0.8630 | Val LogLoss: 0.5725 LR: 0.000062 ✅ 최고 성능(매크로F1) 모델 저장! (Macro-F1: 0.8630) ====================================================================== Epoch 3/50 ======================================================================
📊 Epoch 3 결과: Train Loss: 0.5743 | Train Acc: 97.96% Val Loss: 0.8373 | Val Acc: 86.77% Val Macro-F1: 0.8622 | Val LogLoss: 0.5223 LR: 0.000122 ⏳ Early Stopping 카운터: 1/15 ====================================================================== Epoch 4/50 ======================================================================
📊 Epoch 4 결과: Train Loss: 0.5540 | Train Acc: 98.65% Val Loss: 0.8325 | Val Acc: 86.84% Val Macro-F1: 0.8590 | Val LogLoss: 0.5249 LR: 0.000181 ⏳ Early Stopping 카운터: 2/15 ====================================================================== Epoch 5/50 ======================================================================
📊 Epoch 5 결과: Train Loss: 0.5501 | Train Acc: 98.81% Val Loss: 0.7860 | Val Acc: 89.67% Val Macro-F1: 0.8868 | Val LogLoss: 0.4598 LR: 0.000241 ✅ 최고 성능(매크로F1) 모델 저장! (Macro-F1: 0.8868) ====================================================================== Epoch 6/50 ======================================================================
📊 Epoch 6 결과: Train Loss: 0.5421 | Train Acc: 99.04% Val Loss: 0.7929 | Val Acc: 88.94% Val Macro-F1: 0.8845 | Val LogLoss: 0.4727 LR: 0.000300 ⏳ Early Stopping 카운터: 1/15 ====================================================================== Epoch 7/50 ======================================================================
📊 Epoch 7 결과: Train Loss: 0.5355 | Train Acc: 99.23% Val Loss: 0.8588 | Val Acc: 85.65% Val Macro-F1: 0.8405 | Val LogLoss: 0.5455 LR: 0.000300 ⏳ Early Stopping 카운터: 2/15 ====================================================================== Epoch 8/50 ======================================================================
📊 Epoch 8 결과: Train Loss: 0.5291 | Train Acc: 99.34% Val Loss: 0.8047 | Val Acc: 88.17% Val Macro-F1: 0.8753 | Val LogLoss: 0.4823 LR: 0.000299 ⏳ Early Stopping 카운터: 3/15 ====================================================================== Epoch 9/50 ======================================================================
📊 Epoch 9 결과: Train Loss: 0.5254 | Train Acc: 99.51% Val Loss: 0.8158 | Val Acc: 88.64% Val Macro-F1: 0.8726 | Val LogLoss: 0.4962 LR: 0.000297 ⏳ Early Stopping 카운터: 4/15 ====================================================================== Epoch 10/50 ======================================================================
📊 Epoch 10 결과: Train Loss: 0.5232 | Train Acc: 99.51% Val Loss: 0.7592 | Val Acc: 90.09% Val Macro-F1: 0.8944 | Val LogLoss: 0.4386 LR: 0.000294 ✅ 최고 성능(매크로F1) 모델 저장! (Macro-F1: 0.8944) ====================================================================== Epoch 11/50 ======================================================================
📊 Epoch 11 결과: Train Loss: 0.5227 | Train Acc: 99.53% Val Loss: 0.8097 | Val Acc: 87.84% Val Macro-F1: 0.8716 | Val LogLoss: 0.4940 LR: 0.000291 ⏳ Early Stopping 카운터: 1/15 ====================================================================== Epoch 12/50 ======================================================================
📊 Epoch 12 결과: Train Loss: 0.5253 | Train Acc: 99.45% Val Loss: 0.8132 | Val Acc: 88.64% Val Macro-F1: 0.8722 | Val LogLoss: 0.4948 LR: 0.000287 ⏳ Early Stopping 카운터: 2/15 ====================================================================== Epoch 13/50 ======================================================================
📊 Epoch 13 결과: Train Loss: 0.5233 | Train Acc: 99.47% Val Loss: 0.8518 | Val Acc: 85.10% Val Macro-F1: 0.8468 | Val LogLoss: 0.5540 LR: 0.000282 ⏳ Early Stopping 카운터: 3/15 ====================================================================== Epoch 14/50 ======================================================================
📊 Epoch 14 결과: Train Loss: 0.5193 | Train Acc: 99.61% Val Loss: 0.8297 | Val Acc: 86.35% Val Macro-F1: 0.8540 | Val LogLoss: 0.5202 LR: 0.000277 ⏳ Early Stopping 카운터: 4/15 ====================================================================== Epoch 15/50 ======================================================================
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Cell In[12], line 15 11 print("==== Resnet v0 ====") 14 # Inception-C부터 학습 ---> 15 fold_result = train_fold( 16 fold_idx, 17 train_drivers, 18 val_drivers, 19 ) 21 all_fold_results.append(fold_result) 23 # 메모리 정리 Cell In[11], line 167, in train_fold(fold_idx, train_drivers, val_drivers) 164 train_loss, train_correct, train_total = 0.0, 0, 0 166 pbar = tqdm(train_loader, desc='Training', leave=False) --> 167 for images, labels in pbar: 168 images = images.to(device, non_blocking=True) 169 labels = labels.to(device, non_blocking=True) File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\tqdm\std.py:1181, in tqdm.__iter__(self) 1178 time = self._time 1180 try: -> 1181 for obj in iterable: 1182 yield obj 1183 # Update and possibly print the progressbar. 1184 # Note: does not call self.update(1) for speed optimisation. File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torch\utils\data\dataloader.py:732, in _BaseDataLoaderIter.__next__(self) 729 if self._sampler_iter is None: 730 # TODO(https://github.com/pytorch/pytorch/issues/76750) 731 self._reset() # type: ignore[call-arg] --> 732 data = self._next_data() 733 self._num_yielded += 1 734 if ( 735 self._dataset_kind == _DatasetKind.Iterable 736 and self._IterableDataset_len_called is not None 737 and self._num_yielded > self._IterableDataset_len_called 738 ): File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torch\utils\data\dataloader.py:788, in _SingleProcessDataLoaderIter._next_data(self) 786 def _next_data(self): 787 index = self._next_index() # may raise StopIteration --> 788 data = self._dataset_fetcher.fetch(index) # may raise StopIteration 789 if self._pin_memory: 790 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device) File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torch\utils\data\_utils\fetch.py:52, in _MapDatasetFetcher.fetch(self, possibly_batched_index) 50 data = self.dataset.__getitems__(possibly_batched_index) 51 else: ---> 52 data = [self.dataset[idx] for idx in possibly_batched_index] 53 else: 54 data = self.dataset[possibly_batched_index] Cell In[5], line 36, in DriverDataset.__getitem__(self, idx) 33 image = Image.open(img_path).convert('RGB') 35 if self.transform: ---> 36 image = self.transform(image) 38 if self.is_test: 39 return image, os.path.basename(img_path) File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torchvision\transforms\transforms.py:95, in Compose.__call__(self, img) 93 def __call__(self, img): 94 for t in self.transforms: ---> 95 img = t(img) 96 return img File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torch\nn\modules\module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs) 1773 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1774 else: -> 1775 return self._call_impl(*args, **kwargs) File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torch\nn\modules\module.py:1786, in Module._call_impl(self, *args, **kwargs) 1781 # If we don't have any hooks, we want to skip the rest of the logic in 1782 # this function, and just call forward. 1783 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1784 or _global_backward_pre_hooks or _global_backward_hooks 1785 or _global_forward_hooks or _global_forward_pre_hooks): -> 1786 return forward_call(*args, **kwargs) 1788 result = None 1789 called_always_called_hooks = set() File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torchvision\transforms\transforms.py:1278, in ColorJitter.forward(self, img) 1276 img = F.adjust_contrast(img, contrast_factor) 1277 elif fn_id == 2 and saturation_factor is not None: -> 1278 img = F.adjust_saturation(img, saturation_factor) 1279 elif fn_id == 3 and hue_factor is not None: 1280 img = F.adjust_hue(img, hue_factor) File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\torchvision\transforms\functional.py:912, in adjust_saturation(img, saturation_factor) 907 return F_pil.adjust_contrast(img, contrast_factor) 909 return F_t.adjust_contrast(img, contrast_factor) --> 912 def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: 913 """Adjust color saturation of an image. 914 915 Args: (...) 924 PIL Image or Tensor: Saturation adjusted image. 925 """ 926 if not torch.jit.is_scripting() and not torch.jit.is_tracing(): KeyboardInterrupt:
In [13]:
# ========== 학습 곡선 시각화 (4개 그래프) ==========
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
for result in all_fold_results:
fold_idx = result['fold']
history = result['history']
stopped_epoch = result['stopped_epoch']
best_val_loss = result['best_val_loss']
# 1. Loss 그래프
ax1 = axes[0, 0]
epochs = range(1, len(history['train_loss']) + 1)
ax1.plot(epochs, history['train_loss'], label='Train Loss',
marker='o', linewidth=2, alpha=0.8, color='#1f77b4')
ax1.plot(epochs, history['val_loss'], label='Val Loss',
marker='s', linewidth=2, alpha=0.8, color='#ff7f0e')
# 최저 Val Loss 지점 표시
best_epoch = np.argmin(history['val_loss']) + 1
ax1.scatter(best_epoch, best_val_loss, color='red', s=250, zorder=5,
marker='*', edgecolors='black', linewidths=2,
label=f'Best (Epoch {best_epoch})')
# Early Stopping 지점 표시
if stopped_epoch < num_epochs:
ax1.axvline(stopped_epoch, color='red', linestyle='--',
linewidth=2, alpha=0.5, label=f'Early Stop (E{stopped_epoch})')
ax1.set_title(f'Fold {fold_idx} - Loss (Multiclass Log Loss)',
fontsize=14, fontweight='bold')
ax1.set_xlabel('Epoch', fontsize=12)
ax1.set_ylabel('Loss', fontsize=12)
ax1.legend(loc='best', fontsize=10)
ax1.grid(True, alpha=0.3)
# 2. Accuracy 그래프
ax2 = axes[0, 1]
ax2.plot(epochs, history['train_acc'], label='Train Acc',
marker='o', linewidth=2, alpha=0.8, color='#2ca02c')
ax2.plot(epochs, history['val_acc'], label='Val Acc',
marker='s', linewidth=2, alpha=0.8, color='#d62728')
# 최고 Val Acc 지점 표시
best_acc_epoch = np.argmax(history['val_acc']) + 1
best_val_acc = max(history['val_acc'])
ax2.scatter(best_acc_epoch, best_val_acc, color='green', s=250, zorder=5,
marker='*', edgecolors='black', linewidths=2,
label=f'Best (Epoch {best_acc_epoch})')
if stopped_epoch < num_epochs:
ax2.axvline(stopped_epoch, color='red', linestyle='--',
linewidth=2, alpha=0.5, label=f'Early Stop (E{stopped_epoch})')
ax2.set_title(f'Fold {fold_idx} - Accuracy', fontsize=14, fontweight='bold')
ax2.set_xlabel('Epoch', fontsize=12)
ax2.set_ylabel('Accuracy (%)', fontsize=12)
ax2.legend(loc='best', fontsize=10)
ax2.grid(True, alpha=0.3)
# 3. Learning Rate 그래프
ax3 = axes[1, 0]
ax3.plot(epochs, history['learning_rates'], marker='o', linewidth=2,
color='purple', alpha=0.8, label='Learning Rate')
ax3.set_title(f'Fold {fold_idx} - Learning Rate Schedule',
fontsize=14, fontweight='bold')
ax3.set_xlabel('Epoch', fontsize=12)
ax3.set_ylabel('Learning Rate', fontsize=12)
ax3.set_yscale('log')
ax3.legend(loc='best', fontsize=10)
ax3.grid(True, alpha=0.3, which='both')
if stopped_epoch < num_epochs:
ax3.axvline(stopped_epoch, color='red', linestyle='--',
linewidth=2, alpha=0.5)
# 4. Train vs Val 비교 (Loss & Acc)
ax4 = axes[1, 1]
# Loss 차이
loss_diff = np.array(history['train_loss']) - np.array(history['val_loss'])
ax4_twin = ax4.twinx()
ax4.plot(epochs, loss_diff, marker='o', linewidth=2,
color='#e377c2', alpha=0.7, label='Loss Diff (Train - Val)')
ax4.axhline(0, color='gray', linestyle='--', linewidth=1)
ax4.set_xlabel('Epoch', fontsize=12)
ax4.set_ylabel('Loss Difference', fontsize=12, color='#e377c2')
ax4.tick_params(axis='y', labelcolor='#e377c2')
# Accuracy 차이
acc_diff = np.array(history['train_acc']) - np.array(history['val_acc'])
ax4_twin.plot(epochs, acc_diff, marker='s', linewidth=2,
color='#bcbd22', alpha=0.7, label='Acc Diff (Train - Val)')
ax4_twin.set_ylabel('Accuracy Difference (%)', fontsize=12, color='#bcbd22')
ax4_twin.tick_params(axis='y', labelcolor='#bcbd22')
ax4.set_title(f'Fold {fold_idx} - Overfitting 모니터링',
fontsize=14, fontweight='bold')
ax4.grid(True, alpha=0.3)
# Legend 통합
lines1, labels1 = ax4.get_legend_handles_labels()
lines2, labels2 = ax4_twin.get_legend_handles_labels()
ax4.legend(lines1 + lines2, labels1 + labels2, loc='best', fontsize=9)
if stopped_epoch < num_epochs:
ax4.axvline(stopped_epoch, color='red', linestyle='--',
linewidth=2, alpha=0.5)
plt.tight_layout()
plt.savefig(f'./plots/loss_curve/inception_v{version}_fold{fold_idx}_detailed.png',
dpi=300, bbox_inches='tight')
plt.show()
# ========== 통계 출력 ==========
print("\n" + "="*70)
print("📈 학습 통계 상세")
print("="*70)
print(f"초기 Train Loss: {history['train_loss'][0]:.4f}")
print(f"최종 Train Loss: {history['train_loss'][-1]:.4f}")
print(f"최저 Train Loss: {min(history['train_loss']):.4f} (Epoch {np.argmin(history['train_loss'])+1})")
print(f"\n초기 Val Loss: {history['val_loss'][0]:.4f}")
print(f"최종 Val Loss: {history['val_loss'][-1]:.4f}")
print(f"최저 Val Loss: {min(history['val_loss']):.4f} (Epoch {np.argmin(history['val_loss'])+1})")
print(f"\n초기 Train Acc: {history['train_acc'][0]:.2f}%")
print(f"최종 Train Acc: {history['train_acc'][-1]:.2f}%")
print(f"최고 Train Acc: {max(history['train_acc']):.2f}% (Epoch {np.argmax(history['train_acc'])+1})")
print(f"\n초기 Val Acc: {history['val_acc'][0]:.2f}%")
print(f"최종 Val Acc: {history['val_acc'][-1]:.2f}%")
print(f"최고 Val Acc: {max(history['val_acc']):.2f}% (Epoch {np.argmax(history['val_acc'])+1})")
print(f"\n초기 LR: {history['learning_rates'][0]:.6f}")
print(f"최종 LR: {history['learning_rates'][-1]:.6f}")
print(f"LR 변경 횟수: {len(set(history['learning_rates'])) - 1}회")
print("="*70)
# ========== Overfitting 분석 ==========
final_loss_gap = history['train_loss'][-1] - history['val_loss'][-1]
final_acc_gap = history['train_acc'][-1] - history['val_acc'][-1]
print("\n" + "="*70)
print("🔍 Overfitting 분석")
print("="*70)
print(f"최종 Loss 차이 (Train - Val): {final_loss_gap:+.4f}")
print(f"최종 Acc 차이 (Train - Val): {final_acc_gap:+.2f}%")
if final_acc_gap > 10:
print("⚠️ 경고: 심각한 Overfitting 감지! (Acc 차이 > 10%)")
elif final_acc_gap > 5:
print("⚠️ 주의: 약간의 Overfitting 감지 (Acc 차이 > 5%)")
else:
print("✅ 양호: Overfitting이 잘 제어되고 있습니다.")
print("="*70)
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) Cell In[13], line 112 108 ax4.axvline(stopped_epoch, color='red', linestyle='--', 109 linewidth=2, alpha=0.5) 111 plt.tight_layout() --> 112 plt.savefig(f'./plots/loss_curve/inception_v{version}_fold{fold_idx}_detailed.png', 113 dpi=300, bbox_inches='tight') 114 plt.show() 116 # ========== 통계 출력 ========== File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\pyplot.py:1250, in savefig(*args, **kwargs) 1247 fig = gcf() 1248 # savefig default implementation has no return, so mypy is unhappy 1249 # presumably this is here because subclasses can return? -> 1250 res = fig.savefig(*args, **kwargs) # type: ignore[func-returns-value] 1251 fig.canvas.draw_idle() # Need this if 'transparent=True', to reset colors. 1252 return res File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\figure.py:3490, in Figure.savefig(self, fname, transparent, **kwargs) 3488 for ax in self.axes: 3489 _recursively_make_axes_transparent(stack, ax) -> 3490 self.canvas.print_figure(fname, **kwargs) File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\backend_bases.py:2186, in FigureCanvasBase.print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs) 2182 try: 2183 # _get_renderer may change the figure dpi (as vector formats 2184 # force the figure dpi to 72), so we need to set it again here. 2185 with cbook._setattr_cm(self.figure, dpi=dpi): -> 2186 result = print_method( 2187 filename, 2188 facecolor=facecolor, 2189 edgecolor=edgecolor, 2190 orientation=orientation, 2191 bbox_inches_restore=_bbox_inches_restore, 2192 **kwargs) 2193 finally: 2194 if bbox_inches and restore_bbox: File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\backend_bases.py:2042, in FigureCanvasBase._switch_canvas_and_return_print_method.<locals>.<lambda>(*args, **kwargs) 2038 optional_kws = { # Passed by print_figure for other renderers. 2039 "dpi", "facecolor", "edgecolor", "orientation", 2040 "bbox_inches_restore"} 2041 skip = optional_kws - {*inspect.signature(meth).parameters} -> 2042 print_method = functools.wraps(meth)(lambda *args, **kwargs: meth( 2043 *args, **{k: v for k, v in kwargs.items() if k not in skip})) 2044 else: # Let third-parties do as they see fit. 2045 print_method = meth File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\backends\backend_agg.py:481, in FigureCanvasAgg.print_png(self, filename_or_obj, metadata, pil_kwargs) 434 def print_png(self, filename_or_obj, *, metadata=None, pil_kwargs=None): 435 """ 436 Write the figure to a PNG file. 437 (...) 479 *metadata*, including the default 'Software' key. 480 """ --> 481 self._print_pil(filename_or_obj, "png", pil_kwargs, metadata) File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\backends\backend_agg.py:430, in FigureCanvasAgg._print_pil(self, filename_or_obj, fmt, pil_kwargs, metadata) 425 """ 426 Draw the canvas, then save it using `.image.imsave` (to which 427 *pil_kwargs* and *metadata* are forwarded). 428 """ 429 FigureCanvasAgg.draw(self) --> 430 mpl.image.imsave( 431 filename_or_obj, self.buffer_rgba(), format=fmt, origin="upper", 432 dpi=self.figure.dpi, metadata=metadata, pil_kwargs=pil_kwargs) File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\matplotlib\image.py:1657, in imsave(fname, arr, vmin, vmax, cmap, format, origin, dpi, metadata, pil_kwargs) 1655 pil_kwargs.setdefault("format", format) 1656 pil_kwargs.setdefault("dpi", (dpi, dpi)) -> 1657 image.save(fname, **pil_kwargs) File c:\Users\USER\PycharmProjects\DeepLearning-Term-Proj\.venv\Lib\site-packages\PIL\Image.py:2583, in Image.save(self, fp, format, **params) 2581 fp = builtins.open(filename, "r+b") 2582 else: -> 2583 fp = builtins.open(filename, "w+b") 2584 else: 2585 fp = cast(IO[bytes], fp) FileNotFoundError: [Errno 2] No such file or directory: './plots/loss_curve/inception_v0.0_fold2_detailed.png'
Submission¶
In [23]:
# 학습 결과에서 Fold 2의 정보를 추출합니다.
# all_fold_results에는 현재 단 하나의 결과(Fold 2)만 들어있어야 합니다.
best_model_path = "./models/best_exp1_resnet50d_fold2.pth"
model = timm.create_model(
'resnet26d',
pretrained=True,
num_classes=num_classes
).to(device)
if not all_fold_results:
print("🚨 오류: 학습된 폴드 결과가 없습니다. train_fold 함수를 먼저 실행하세요.")
else:
result = all_fold_results[0]
fold_idx = result['fold']
model_path = result['model_path']
print("\n" + "=" * 70)
print(f"🔮 Fold {fold_idx} 단일 모델 예측 시작")
print("=" * 70)
# 테스트 데이터셋 생성
# test_dir은 'data/imgs/test'여야 합니다.
test_dataset = DriverDataset(
test_dir, driver_df, [],
transform=team_transform_eval, is_test=True
)
test_loader = DataLoader(
test_dataset,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers
)
print(f"테스트 샘플: {len(test_dataset)}개")
# --- 모델 로드 및 예측 ---
# 2. 저장된 최저 Loss 가중치 로드
print(f"\n📁 모델 로드 중: {model_path}...")
try:
checkpoint = torch.load(model_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
print(f"✓ Fold {fold_idx} 모델 (Epoch {checkpoint['epoch']+1}, Val Loss: {checkpoint.get('val_loss', 'N/A'):.4f}) 로드 완료.")
except Exception as e:
print(f"🚨 모델 로드 실패: {e}")
# 이 시점에서 코드 실행을 멈추거나, 오류 처리를 진행할 수 있습니다.
raise
model = model.to(device)
model.eval() # 평가 모드 설정
# 3. 예측 실행
predictions = []
img_names = []
with torch.no_grad():
for images, filenames in tqdm(test_loader, desc=f'Fold {fold_idx} 예측'):
images = images.to(device)
outputs = model(images)
# 4. 확률 (Softmax) 계산
probs = torch.softmax(outputs, dim=1)
predictions.append(probs.cpu().numpy())
img_names.extend(filenames)
final_predictions = np.vstack(predictions)
print(f"\n✓ 예측 완료: {final_predictions.shape}")
# --- Submission 파일 생성 ---
# 클래스 이름을 c0, c1, ... c9로 설정
class_cols = [f'c{i}' for i in range(num_classes)]
submission_data = {'img': img_names}
for i, col in enumerate(class_cols):
submission_data[col] = final_predictions[:, i]
submission = pd.DataFrame(submission_data)
submission_file = f'inception_v{version}.csv'
submission.to_csv("./submissions/"+submission_file, index=False)
print("\n" + "=" * 70)
print(f"✅ Submission 파일 생성 완료: {submission_file}")
print(f"✅ 총 {len(submission)}개 이미지 예측")
print("=" * 70)
print("\n📋 Submission 샘플:")
print(submission.head())
🚨 오류: 학습된 폴드 결과가 없습니다. train_fold 함수를 먼저 실행하세요.
Analysis¶
In [ ]:
# 셀 추가: Confusion Matrix 분석
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
def analyze_predictions(model, val_loader, device):
"""검증 데이터로 상세 분석"""
model.eval()
all_preds = []
all_labels = []
all_probs = []
with torch.no_grad():
for images, labels in tqdm(val_loader, desc='예측 중'):
images = images.to(device)
outputs = model(images)
probs = torch.softmax(outputs, dim=1)
_, preds = torch.max(outputs, 1)
all_preds.extend(preds.cpu().numpy())
all_labels.extend(labels.cpu().numpy())
all_probs.append(probs.cpu().numpy())
all_probs = np.vstack(all_probs)
return np.array(all_labels), np.array(all_preds), all_probs
# 모델 로드
model = timm.create_model(
'resnet26d',
pretrained=True,
num_classes=num_classes
).to(device)
checkpoint = torch.load(best_model_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model = model.to(device)
try:
os.mkdir(f'./plots/resnet_v{version}')
except:
pass
# 예측
fold_info = fold_splits[1]
fold_idx = fold_info['fold']
train_drivers = fold_info['train_drivers']
val_drivers = fold_info['val_drivers']
val_dataset = DriverDataset(
train_dir, driver_df, val_drivers,
transform=team_transform_eval, is_test=False
)
val_loader = DataLoader(
val_dataset,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
)
y_true, y_pred, y_probs = analyze_predictions(model, val_loader, device)
# 1. Confusion Matrix
class_names = [f'c{i}' for i in range(10)]
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=class_names, yticklabels=class_names,
cbar_kws={'label': 'Count'})
plt.title('Confusion Matrix', fontsize=16, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.savefig(f'plots/resnet_v{version}/confusion_matrix.png', dpi=300)
plt.show()
# 2. Normalized Confusion Matrix (비율)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(12, 10))
sns.heatmap(cm_normalized, annot=True, fmt='.2%', cmap='YlOrRd',
xticklabels=class_names, yticklabels=class_names)
plt.title('Normalized Confusion Matrix (%)', fontsize=16, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.savefig(f'plots/resnet_v{version}/confusion_matrix_normalized.png', dpi=300)
plt.show()
# 3. Classification Report
print("\n" + "="*70)
print("📊 Classification Report")
print("="*70)
print(classification_report(y_true, y_pred, target_names=class_names, digits=4))
운전자5명, 데이터 4006개 이미지
예측 중: 100%|██████████| 63/63 [00:22<00:00, 2.74it/s]
======================================================================
📊 Classification Report
======================================================================
precision recall f1-score support
c0 0.8033 0.7505 0.7760 457
c1 0.9976 0.8884 0.9398 466
c2 0.9977 0.9884 0.9930 430
c3 1.0000 0.9905 0.9952 423
c4 0.9184 0.9955 0.9554 441
c5 0.9524 0.9756 0.9639 410
c6 0.8785 0.9216 0.8995 408
c7 0.8966 0.9873 0.9398 316
c8 0.8049 0.6923 0.7444 286
c9 0.7093 0.7669 0.7370 369
accuracy 0.9009 4006
macro avg 0.8959 0.8957 0.8944 4006
weighted avg 0.9019 0.9009 0.9001 4006
In [26]:
# 클래스별 성능 분석
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
if 'y_true' not in globals() or 'y_pred' not in globals() or 'y_probs' not in globals():
print("y_true/y_pred/y_probs가 없음 — analyze_predictions 실행 중...")
y_true, y_pred, y_probs = analyze_predictions(model, val_loader, device)
else:
print("y_true/y_pred/y_probs 이미 존재, 재계산 생략")
precision, recall, f1, support = precision_recall_fscore_support(
y_true, y_pred, average=None
)
# 데이터프레임으로 정리
class_performance = pd.DataFrame({
'Class': class_names,
'Precision': precision,
'Recall': recall,
'F1-Score': f1,
'Support': support,
'Accuracy': [accuracy_score(y_true[y_true==i], y_pred[y_true==i])
if np.sum(y_true==i) > 0 else 0 for i in range(10)]
})
print("\n" + "="*70)
print("📈 클래스별 성능")
print("="*70)
print(class_performance.to_string(index=False))
# 시각화
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# Precision
axes[0, 0].bar(class_names, precision, color='skyblue', alpha=0.8)
axes[0, 0].set_title('Precision by Class', fontweight='bold')
axes[0, 0].set_ylabel('Precision')
axes[0, 0].set_ylim([0, 1.1])
axes[0, 0].grid(axis='y', alpha=0.3)
# Recall
axes[0, 1].bar(class_names, recall, color='lightcoral', alpha=0.8)
axes[0, 1].set_title('Recall by Class', fontweight='bold')
axes[0, 1].set_ylabel('Recall')
axes[0, 1].set_ylim([0, 1.1])
axes[0, 1].grid(axis='y', alpha=0.3)
# F1-Score
axes[1, 0].bar(class_names, f1, color='lightgreen', alpha=0.8)
axes[1, 0].set_title('F1-Score by Class', fontweight='bold')
axes[1, 0].set_ylabel('F1-Score')
axes[1, 0].set_ylim([0, 1.1])
axes[1, 0].grid(axis='y', alpha=0.3)
# Support
axes[1, 1].bar(class_names, support, color='plum', alpha=0.8)
axes[1, 1].set_title('Support (Sample Count) by Class', fontweight='bold')
axes[1, 1].set_ylabel('Count')
axes[1, 1].grid(axis='y', alpha=0.3)
plt.tight_layout()
os.mkdir(f'plots/resnet_v{version}')
plt.savefig(f'plots/resnet_v{version}/class_performance.png', dpi=300)
plt.show()
y_true/y_pred/y_probs 이미 존재, 재계산 생략 ====================================================================== 📈 클래스별 성능 ====================================================================== Class Precision Recall F1-Score Support Accuracy c0 0.803279 0.750547 0.776018 457 0.750547 c1 0.997590 0.888412 0.939841 466 0.888412 c2 0.997653 0.988372 0.992991 430 0.988372 c3 1.000000 0.990544 0.995249 423 0.990544 c4 0.918410 0.995465 0.955386 441 0.995465 c5 0.952381 0.975610 0.963855 410 0.975610 c6 0.878505 0.921569 0.899522 408 0.921569 c7 0.896552 0.987342 0.939759 316 0.987342 c8 0.804878 0.692308 0.744361 286 0.692308 c9 0.709273 0.766938 0.736979 369 0.766938
In [30]:
# ROC Curve 분석
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
# One-hot encoding
y_true_bin = label_binarize(y_true, classes=range(10))
# 각 클래스별 ROC Curve
plt.figure(figsize=(14, 10))
for i in range(10):
fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_probs[:, i])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=2, label=f'{class_names[i]} (AUC = {roc_auc:.3f})')
plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Classifier')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontsize=12)
plt.ylabel('True Positive Rate', fontsize=12)
plt.title('ROC Curves (Multi-class)', fontsize=16, fontweight='bold')
plt.legend(loc='lower right', fontsize=10)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig(f'plots/resnet_v{version}/roc_curves.png', dpi=300)
plt.show()
In [ ]:
In [32]:
# Grad-CAM 구현
import torch.nn.functional as F
from torchvision.transforms import ToPILImage
class GradCAM:
"""Grad-CAM (ResNet 호환)"""
def __init__(self, model, target_module=None):
self.model = model
self.target_module = target_module
self.gradients = None
self.activations = None
# target_module이 None이면 모델에서 마지막 Conv2d를 찾아 사용
if self.target_module is None:
self.target_module, _ = get_last_conv_layer_resnet(self.model)
if self.target_module is None:
raise RuntimeError("마지막 Conv 레이어를 찾을 수 없습니다.")
# 훅 등록 (forward / backward)
self.target_module.register_forward_hook(self._save_activation)
# 최신 PyTorch에서는 register_full_backward_hook 권장, 없으면 register_backward_hook 사용
if hasattr(self.target_module, "register_full_backward_hook"):
self.target_module.register_full_backward_hook(self._save_gradient)
else:
# 구버전 호환
self.target_module.register_backward_hook(self._save_gradient)
def _save_activation(self, module, input, output):
self.activations = output.detach()
def _save_gradient(self, module, grad_input, grad_output):
# grad_output는 tuple
self.gradients = grad_output[0].detach()
def generate_cam(self, input_tensor, target_class=None):
"""
input_tensor: [1, C, H, W] (이미 정규화된 텐서)
target_class: int or None (None이면 argmax 사용)
returns: cam (H, W) float numpy [0..1], logits tensor
"""
self.model.eval()
self.gradients = None
self.activations = None
logits = self.model(input_tensor) # forward
if target_class is None:
target_class = int(logits.argmax(dim=1)[0])
# backward on the score of target_class
self.model.zero_grad()
score = logits[0, target_class]
score.backward(retain_graph=True)
if self.gradients is None or self.activations is None:
raise RuntimeError("Gradients or activations not recorded. Check hooks.")
# gradients: [1, C, H, W] -> use [C, H, W]
gradients = self.gradients[0].cpu()
activations = self.activations[0].cpu()
# global average pooling of gradients -> weights [C]
weights = gradients.mean(dim=(1, 2)) # [C]
# weighted sum of activations
cam = (weights[:, None, None] * activations).sum(dim=0) # [H, W]
cam = F.relu(cam)
cam = cam - cam.min()
if cam.max() > 0:
cam = cam / cam.max()
cam_np = cam.numpy().astype(np.float32)
return cam_np, logits
def get_last_conv_layer_resnet(model):
"""
ResNet 계열 모델에서 마지막 nn.Conv2d 모듈을 찾아 반환
반환: (module, name) 또는 (None, None)
"""
for name, module in reversed(list(model.named_modules())):
if isinstance(module, torch.nn.Conv2d):
return module, name
return None, None
def visualize_gradcam(model, image, true_label, pred_label, device):
"""Grad-CAM 시각화 (ResNet)"""
# target layer 찾기/설정
target_module, target_name = get_last_conv_layer_resnet(model)
if target_module is None:
print("⚠️ 마지막 Conv 레이어를 찾을 수 없습니다.")
return
gradcam = GradCAM(model, target_module)
# 이미지는 [C, H, W] (정규화된 텐서)
input_tensor = image.unsqueeze(0).to(device)
cam, output = gradcam.generate_cam(input_tensor, target_class=pred_label)
# 원본 이미지 복원 (정규화 역변환)
img_np = image.cpu().numpy().transpose(1, 2, 0)
img_np = img_np * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
img_np = np.clip(img_np, 0, 1)
# cam은 [H, W], float [0..1] -> uint8로 변환 후 PIL 리사이즈
H, W = img_np.shape[:2]
cam_uint8 = np.uint8(255 * cam)
cam_pil = Image.fromarray(cam_uint8).resize((W, H), Image.BILINEAR)
cam_resized = np.array(cam_pil) / 255.0
# 시각화
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
axes[0].imshow(img_np)
axes[0].set_title(f'Original\nTrue: c{true_label}', fontsize=12)
axes[0].axis('off')
axes[1].imshow(img_np)
axes[1].imshow(cam_resized, cmap='jet', alpha=0.5)
axes[1].set_title(f'Grad-CAM\nPred: c{pred_label}', fontsize=12)
axes[1].axis('off')
axes[2].imshow(cam_resized, cmap='jet')
axes[2].set_title('Heatmap', fontsize=12)
axes[2].axis('off')
plt.tight_layout()
return fig
# 샘플 이미지에 Grad-CAM 적용
print("\n" + "="*70)
print("🔥 Grad-CAM 시각화")
print("="*70)
model = timm.create_model(
'resnet26d',
pretrained=True,
num_classes=num_classes
).to(device)
checkpoint = torch.load(best_model_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model = model.to(device)
model.eval()
# 검증 데이터에서 샘플 추출 (정답/오답 각각)
correct_samples = []
incorrect_samples = []
for i, (images, labels) in enumerate(val_loader):
if len(correct_samples) >= 3 and len(incorrect_samples) >= 3:
break
images = images.to(device)
labels = labels.to(device)
with torch.no_grad():
outputs = model(images)
_, preds = torch.max(outputs, 1)
for j in range(len(images)):
if preds[j] == labels[j] and len(correct_samples) < 3:
correct_samples.append((images[j], labels[j].item(), preds[j].item()))
elif preds[j] != labels[j] and len(incorrect_samples) < 3:
incorrect_samples.append((images[j], labels[j].item(), preds[j].item()))
# 정답 샘플 시각화
print("\n✅ 정답 예측 샘플")
for idx, (img, true_label, pred_label) in enumerate(correct_samples):
fig = visualize_gradcam(model, img, true_label, pred_label, device)
plt.savefig(f'plots/resnet_v{version}/gradcam_correct_{idx}.png', dpi=300, bbox_inches='tight')
plt.show()
# 오답 샘플 시각화
print("\n❌ 오답 예측 샘플")
for idx, (img, true_label, pred_label) in enumerate(incorrect_samples):
fig = visualize_gradcam(model, img, true_label, pred_label, device)
plt.savefig(f'plots/resnet_v{version}/gradcam_incorrect_{idx}.png', dpi=300, bbox_inches='tight')
plt.show()
====================================================================== 🔥 Grad-CAM 시각화 ====================================================================== ✅ 정답 예측 샘플
❌ 오답 예측 샘플
In [35]:
# Feature Map 추출 및 시각화
class FeatureExtractor:
"""중간 레이어의 Feature Map 추출"""
def __init__(self, model, layer_names):
self.model = model
self.layer_names = layer_names
self.features = {}
# Hook 등록
for name, layer in model.named_modules():
if name in layer_names:
layer.register_forward_hook(self.save_feature(name))
def save_feature(self, name):
def hook(module, input, output):
self.features[name] = output.detach()
return hook
def extract(self, x):
self.features = {}
_ = self.model(x)
return self.features
def visualize_feature_maps(features, layer_name, max_channels=16):
"""Feature Map 시각화"""
feature = features[layer_name][0] # 첫 번째 배치
num_channels = min(feature.shape[0], max_channels)
# Grid 크기 계산
grid_size = int(np.ceil(np.sqrt(num_channels)))
fig, axes = plt.subplots(grid_size, grid_size, figsize=(15, 15))
axes = axes.flatten()
for i in range(num_channels):
fmap = feature[i].cpu().numpy()
axes[i].imshow(fmap, cmap='viridis')
axes[i].set_title(f'Ch {i}', fontsize=8)
axes[i].axis('off')
# 빈 subplot 숨기기
for i in range(num_channels, len(axes)):
axes[i].axis('off')
plt.suptitle(f'Feature Maps: {layer_name}', fontsize=16, fontweight='bold')
plt.tight_layout()
return fig
# Inception V4의 주요 레이어 이름 확인
print("\n" + "="*70)
print("🔍 모델 구조 탐색")
print("="*70)
for name, module in model.named_modules():
if isinstance(module, nn.Conv2d):
print(f"Conv Layer: {name}")
# 주요 레이어 선택 (예시)
target_layers = [
'conv1.6', # 첫 번째 Conv
'layer1.0.conv3', # 중간 블록
'layer2.0.conv3', # 후반 블록
'layer3.0.conv3',
'layer4.0.conv3',
'layer4.1.conv3'
]
# 샘플 이미지로 Feature Map 추출
sample_image, sample_label = next(iter(val_loader))
sample_image = sample_image[0:1].to(device) # 첫 번째 이미지
extractor = FeatureExtractor(model, target_layers)
features = extractor.extract(sample_image)
# 각 레이어 시각화
for layer_name in target_layers:
if layer_name in features:
fig = visualize_feature_maps(features, layer_name, max_channels=16)
plt.savefig(f'plots/resnet_v{version}/feature_map_{layer_name.replace(".", "_")}.png', dpi=300)
plt.show()
====================================================================== 🔍 모델 구조 탐색 ====================================================================== Conv Layer: conv1.0 Conv Layer: conv1.3 Conv Layer: conv1.6 Conv Layer: layer1.0.conv1 Conv Layer: layer1.0.conv2 Conv Layer: layer1.0.conv3 Conv Layer: layer1.0.downsample.1 Conv Layer: layer1.1.conv1 Conv Layer: layer1.1.conv2 Conv Layer: layer1.1.conv3 Conv Layer: layer2.0.conv1 Conv Layer: layer2.0.conv2 Conv Layer: layer2.0.conv3 Conv Layer: layer2.0.downsample.1 Conv Layer: layer2.1.conv1 Conv Layer: layer2.1.conv2 Conv Layer: layer2.1.conv3 Conv Layer: layer3.0.conv1 Conv Layer: layer3.0.conv2 Conv Layer: layer3.0.conv3 Conv Layer: layer3.0.downsample.1 Conv Layer: layer3.1.conv1 Conv Layer: layer3.1.conv2 Conv Layer: layer3.1.conv3 Conv Layer: layer4.0.conv1 Conv Layer: layer4.0.conv2 Conv Layer: layer4.0.conv3 Conv Layer: layer4.0.downsample.1 Conv Layer: layer4.1.conv1 Conv Layer: layer4.1.conv2 Conv Layer: layer4.1.conv3
In [ ]:
# t-SNE/UMAP으로 임베딩 시각화
from sklearn.manifold import TSNE
# pip install umap-learn
# from umap import UMAP
def extract_embeddings(model, dataloader, device):
"""마지막 FC 레이어 이전의 임베딩 추출"""
model.eval()
embeddings = []
labels = []
# FC 레이어 이전 출력 추출을 위한 Hook
features = []
def hook(module, input, output):
features.append(input[0].detach())
# Inception V4의 마지막 FC 레이어에 Hook
if hasattr(model, 'last_linear'):
handle = model.last_linear.register_forward_hook(hook)
elif hasattr(model, 'fc'):
handle = model.fc.register_forward_hook(hook)
with torch.no_grad():
for images, lbls in tqdm(dataloader, desc='임베딩 추출'):
images = images.to(device)
_ = model(images)
embeddings.append(features[-1].cpu().numpy())
labels.extend(lbls.numpy())
features.clear()
handle.remove()
embeddings = np.vstack(embeddings)
labels = np.array(labels)
return embeddings, labels
# 임베딩 추출
print("\n" + "="*70)
print("🧬 임베딩 추출 중...")
print("="*70)
embeddings, labels = extract_embeddings(model, val_loader, device)
print(f"임베딩 shape: {embeddings.shape}")
# t-SNE 시각화
print("\n📊 t-SNE 계산 중...")
tsne = TSNE(n_components=2, random_state=42, perplexity=30, max_iter=1000)
embeddings_2d = tsne.fit_transform(embeddings)
plt.figure(figsize=(14, 10))
scatter = plt.scatter(
embeddings_2d[:, 0],
embeddings_2d[:, 1],
c=labels,
cmap='tab10',
s=10,
alpha=0.6
)
plt.colorbar(scatter, label='Class', ticks=range(10))
plt.title('t-SNE Visualization of Learned Embeddings', fontsize=16, fontweight='bold')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig(f'plots/inception_v{version}/tsne_embeddings.png', dpi=300)
plt.show()
# 클래스별로 색상 구분하여 Legend 추가
plt.figure(figsize=(16, 12))
for class_idx in range(10):
mask = labels == class_idx
plt.scatter(
embeddings_2d[mask, 0],
embeddings_2d[mask, 1],
label=f'c{class_idx}',
s=20,
alpha=0.7
)
plt.title('t-SNE Visualization by Class', fontsize=16, fontweight='bold')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.legend(loc='best', fontsize=10)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig(f'plots/inception_v{version}/tsne_embeddings_by_class.png', dpi=300)
plt.show()
====================================================================== 🧬 임베딩 추출 중... ======================================================================
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[20], line 44 41 print("🧬 임베딩 추출 중...") 42 print("="*70) ---> 44 embeddings, labels = extract_embeddings(model, val_loader, device) 45 print(f"임베딩 shape: {embeddings.shape}") 47 # t-SNE 시각화 NameError: name 'model' is not defined
In [ ]:
In [ ]: